import glob
import jpype
from jpype import *
l = glob.glob('c:/tika/*.jar')
l = map(lambda s:s.replace('\\','/'),l)
class_path = l#['c:/tika']
class_path = (':').join(class_path)
szarg = "-Djava.ext.dirs=c:/tika"
startJVM(jpype.getDefaultJVMPath(),"-ea","-Djava.ext.dirs=c:/tika")

tika = JPackage('org.apache.tika')
P = JPackage('org.apache.tika.parser')



p = P.AutoDetectParser()


import glob
l = glob.glob(r'C:\Documents and Settings\fingul\My Documents\Dropbox\*.*')


def i(s):
    f = java.io.FileInputStream(s)
    nlimit = -1
    c = tika.sax.BodyContentHandler(nlimit)
    m = tika.metadata.Metadata()    
    p.parse(f,c,m)
    s = c.toString()
    #s = s.encode('utf8')
    s = s.encode('euckr','ignore')
    print 'CONTENT=',s
    for szname in m.names():
        print szname,m.get(szname)    
'''
for  in l:
    print i

    print '-'*20
'''
i('/0.docx')
pass